from BasicLibraries import *
import regressions as regression_list
from main_class import MAIN
import sys
import warnings
warnings.simplefilter("ignore", UserWarning)



MN = MAIN()
reg = regression_list.RegressionList()
emission_type = 'DirectControl'
strategies_names = ['Initiatives intensity', 'Risk mitigation', 'Stakeholders engagement', 'Innovation', 'Entropy']
controls_ = ['firm_size', 'Tangibility', 'MTB',  'turnover', 'firm_size_t1']
climate_initiatives = False
file_label = '_'+emission_type+['_all' if climate_initiatives == False else '_climate'][0]


#%%
SCALER_TYPE =  sys.argv[1]
print(SCALER_TYPE)


#%% Choose the analysis to run
total_effect = True
just_size = True
full_sample = True
just_diversity = True
effect_by_year = True
effect_by_sector = True

### Robustness tests
alternative_scopes = False
Downloaded = False
reported_emissions = False
alternative_vars = False


#%%
golden_data, panel_data, tc, controls_, dummies, new_panel, two_dummies = pickle.load(open('data_1224.pckl','rb'))
golden_data = golden_data[golden_data.number_of_initiatives > 2]

#%% Mediation analysis on size
if total_effect:
    if SCALER_TYPE == 'relative':
        print('Estimating total effect')
        TotalDist, efx, efxL, efxU = MN.make_total_effect(emission_type, tc, golden_data, panel_data, ['firm_size', 'Tangibility', 'MTB',  'turnover'], dummies, scaler_type='relative')
        pickle.dump([TotalDist, efx, efxL, efxU], open('output_data/'+SCALER_TYPE+'_TotalEffect'+file_label+'.pckl', 'wb'))


#%% Mediation analysis on size
if just_size:
    if SCALER_TYPE == 'relative':
        print('Running size channel')
        table, TotalEffectDist, full_distributions, SampleDescr = MN.get_table(emission_type, tc, golden_data, panel_data, ['Size'], controls_, dummies, 
                             scaler_type=SCALER_TYPE,make_total=False)
        pickle.dump([table, TotalEffectDist, full_distributions, SampleDescr], open('output_data/'+SCALER_TYPE+'_results_Size'+file_label+'.pckl', 'wb'))

#%% Mediation analysis on all initiatives
if full_sample:
    print('Running main regression')
    table, TotalEffectDist, full_distributions, SampleDescr = MN.get_table(emission_type, tc, golden_data, panel_data, strategies_names, controls_, dummies, 
                         scaler_type=SCALER_TYPE,make_total=False)
    pickle.dump([table, TotalEffectDist, full_distributions, SampleDescr], open('output_data/'+SCALER_TYPE+'_results_full'+file_label+'.pckl', 'wb'))

#%% Just diversity channel
if just_diversity:
    if SCALER_TYPE == 'relative':
        print('Running diversification channel')
        table, TotalEffectDist, full_distributions, SampleDescr = MN.get_table(emission_type, tc, golden_data, panel_data, ['Entropy', 'Concentration', 'Simpson'], controls_, dummies, 
                             scaler_type=SCALER_TYPE,make_total=False)
        pickle.dump([table, TotalEffectDist, full_distributions, SampleDescr], open('output_data/'+SCALER_TYPE+'_results_RD'+file_label+'.pckl', 'wb'))
   

#%% Effect by year
year_table = dict()
year_distributions = pd.DataFrame()
if effect_by_year:
    ex_res, lb_res, ub_res = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
    tot_eff = []
    tot_effect_name = r'$\tilde{\beta} + \phi\eta$'
    for year in range(2016,2021):
        print('Effect by year:', year)
        yearly_panel_data = panel_data[panel_data.fyear <= year].copy()
        yearly_golden_data = golden_data[golden_data.rfyear <= year].copy()

        to_remove = []
        for i in range(year+1, 2021): 
            to_remove.append(i)
        yearly_dummies = list(set(dummies) ^ set(to_remove))
        #====
        table, TotalEffectDist, full_distributions, SampleDescr = MN.get_table(emission_type, tc, yearly_golden_data, yearly_panel_data, strategies_names, controls_, yearly_dummies, 
                             scaler_type=SCALER_TYPE,make_total=False)
        year_table[str(year)]=table
        full_distributions['Year'] = [year]*len(full_distributions)
        year_distributions = pd.concat((year_distributions, full_distributions))
        r = table[r'$\frac{\phi\eta}{\phi\eta+\tilde{\beta}}$']
        efx = r.apply(lambda x: x.split(' [')[0])
        lb =  r.apply(lambda x: x.split('[')[1].split(',')[0])
        ub =  r.apply(lambda x: x.split(']')[0].split(',')[1])
        tot = float(table[tot_effect_name].apply(lambda x: x.split(' [')[0]).iloc[0])
        totL = float(table[tot_effect_name].apply(lambda x: x.split('[')[1].split(',')[0]).iloc[0])
        totU = float(table[tot_effect_name].apply(lambda x: x.split(']')[0].split(',')[1]).iloc[0])
        ex_res = pd.concat((ex_res, efx), axis = 1)
        ub_res = pd.concat((ub_res, ub), axis = 1)
        lb_res = pd.concat((lb_res, lb), axis = 1)
        tot_eff.append([tot, totL, totU])
    pickle.dump([year_table,ex_res, lb_res, ub_res, tot_eff], open('TMP.pckl', 'wb'))
    tot_eff = pd.DataFrame(tot_eff, columns  = ['effect', 'lower_bound', 'upper_bound'], index = [i for i in range(2016,2021)])
    ex_res.columns = [i for i in range(2016,2021)]
    lb_res.columns = [i for i in range(2016,2021)]
    ub_res.columns = [i for i in range(2016,2021)]
    pickle.dump([year_table, tot_eff, ex_res, lb_res, ub_res, year_distributions, TotalEffectDist], open('output_data/'+SCALER_TYPE+'_results_year'+file_label+'.pckl', 'wb'))


#%% Effect by sector
sector_table = dict()
sector_distributions = pd.DataFrame()
if effect_by_sector: 
    ex_res, lb_res, ub_res = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
    tot_eff = []
    sectors = [['Industrial', 'Utilities', 'Material', 'Energy', 'Consumer Staple'], ['Health Care', 'Information Technology', 'Communication Services', 'Consumer Discretionary']]
    all_sectors = list(golden_data.GICS_level_1.dropna().unique())
    count=0
    tot_effect_name = r'$\tilde{\beta} + \phi\eta$'
    for SEC in sectors:
        print('Effect by sectors:', SEC[0])
        sec_golden_data = golden_data[golden_data.GICS_level_1.isin(SEC)].copy()
        #====
        
        other_sectors = list(set(all_sectors) ^ set(SEC))
        sector_dummies = list(set(dummies) ^ set(other_sectors))
        if count == 1: sector_dummies.remove('Health Care')
        if 'Material' in sector_dummies: sector_dummies.remove('Material')
        #====
        table, TotalEffectDist, full_distributions, SampleDescr = MN.get_table(emission_type, tc, sec_golden_data, panel_data, strategies_names, controls_, sector_dummies, 
                             scaler_type=SCALER_TYPE,make_total=False)
        sector_table[SEC[0]]=table
        full_distributions['Sector'] = [SEC[0]]*len(full_distributions)
        sector_distributions = pd.concat((sector_distributions, full_distributions))
        r = table[r'$\frac{\phi\eta}{\phi\eta+\tilde{\beta}}$']
        efx = r.apply(lambda x: x.split(' [')[0])
        lb =  r.apply(lambda x: x.split('[')[1].split(',')[0])
        ub =  r.apply(lambda x: x.split(']')[0].split(',')[1])
        tot = float(table[tot_effect_name].apply(lambda x: x.split(' [')[0]).iloc[0])
        totL = float(table[tot_effect_name].apply(lambda x: x.split('[')[1].split(',')[0]).iloc[0])
        totU = float(table[tot_effect_name].apply(lambda x: x.split(']')[0].split(',')[1]).iloc[0])
        ex_res = pd.concat((ex_res, efx), axis = 1)
        ub_res = pd.concat((ub_res, ub), axis = 1)
        lb_res = pd.concat((lb_res, lb), axis = 1)
        tot_eff.append([tot, totL, totU])
        count+=1
    pickle.dump([sector_table, ex_res, lb_res, ub_res, tot_eff], open('TMP.pckl', 'wb'))
    tot_eff = pd.DataFrame(tot_eff, columns = ['effect', 'lower_bound', 'upper_bound'], index  = ['High emissions', 'Low emissions'] )
    ex_res.columns = ['High emissions', 'Low emissions'] 
    lb_res.columns = ['High emissions', 'Low emissions'] 
    ub_res.columns = ['High emissions', 'Low emissions'] 
    pickle.dump([sector_table, tot_eff, ex_res, lb_res, ub_res, sector_distributions, TotalEffectDist], open('output_data/'+SCALER_TYPE+'_results_sector'+file_label+'.pckl', 'wb'))



#%%
############################################
############# ROBUSTNESS TESTS #############
############################################
#%% Alternative scopes
if alternative_scopes:
    print('Running main regression')
    table, TotalEffectDist, full_distributions, SampleDescr = MN.get_table('Scope 1', tc, golden_data, panel_data, strategies_names, controls_, dummies, 
                         scaler_type=SCALER_TYPE,make_total=False)
    pickle.dump([table, TotalEffectDist, full_distributions], open('output_data/'+SCALER_TYPE+'_results_full'+file_label+'_scope_1.pckl', 'wb'))

    print('Running main regression')
    table, TotalEffectDist, full_distributions, SampleDescr = MN.get_table('Scope 2', tc, golden_data, panel_data, strategies_names, controls_, dummies, 
                         scaler_type=SCALER_TYPE,make_total=False)
    pickle.dump([table, TotalEffectDist, full_distributions], open('output_data/'+SCALER_TYPE+'_results_full'+file_label+'_scope_2.pckl', 'wb'))

#%% Only downloaded
if Downloaded:
    print('Running main regression')
    table, TotalEffectDist, full_distributions, SampleDescr = MN.get_table(emission_type, tc, golden_data[golden_data.data_type!='crawled'], panel_data, strategies_names, controls_, dummies, 
                         scaler_type=SCALER_TYPE,make_total=False)
    pickle.dump([table, TotalEffectDist, full_distributions], open('output_data/'+SCALER_TYPE+'_results_download'+file_label+'.pckl', 'wb'))
#%% Only reported emissions
if reported_emissions:
    print('Running main regression with only reported emissions')
    table, TotalEffectDist, full_distributions, SampleDescr = MN.get_table(emission_type, tc[tc.Reported == 1], golden_data, panel_data, strategies_names, controls_, dummies, 
                         scaler_type=SCALER_TYPE,make_total=False)
    pickle.dump([table, TotalEffectDist, full_distributions], open('output_data/'+SCALER_TYPE+'_results_reported'+file_label+'.pckl', 'wb'))

#%% Mediation analysis on all initiatives
if alternative_vars:
    print('Running robustness alternative vars')
    pnls = panel_data.copy()
    pnls['capx'] = pnls['capx'].apply(lambda x: np.log(1+x))
    table, TotalEffectDist, full_distributions, SampleDescr = MN.robs_tables(emission_type, tc, golden_data, pnls, strategies_names, controls_, dummies, med_controls = ['Profitability', 'Leverage'], 
                              scaler_type=SCALER_TYPE,make_total=False)
    pickle.dump([table, TotalEffectDist, full_distributions], open('output_data/'+SCALER_TYPE+'_results_rob'+file_label+'_leverage.pckl', 'wb'))
    table, TotalEffectDist, full_distributions, SampleDescr = MN.robs_tables(emission_type, tc, golden_data, pnls, strategies_names, controls_, dummies, med_controls = ['Profitability', 'Leverage', 'capx'], 
                              scaler_type=SCALER_TYPE,make_total=False)
    pickle.dump([table, TotalEffectDist, full_distributions], open('output_data/'+SCALER_TYPE+'_results_rob'+file_label+'_capx.pckl', 'wb'))
